clear all
cd "C:\Users\favero\Documents\COVID-19\replication materials\PAR"

capture log close
log using reasearchnote.txt, text replace

use pedersenfaveroPAR2020.dta

*** Initial Data Cleaning ***

// same user took survey twice; deleting 2nd response
drop if ResponseId == "R_1FJTuRyeH7msWDG"

// check how many respondents fail attention checks
gen countattention = (attention1==2) + (attention2_1==0) + (attention3_1==10)
tab countattention

// Main independent variable
//rename arm arm_str
//encode arm_str, gen(arm)
//label define arm 1 "Control" 2 "Treat. 1" 3 "Treat. 2" 4 "Treat. 3" 5 "Treat. 4", replace

// Dependent variable 1
egen maxweeks = rowmean(maxweeks?)
replace maxweeks = 104 if maxweeks > 104 // windsorize 12 extreme values
la var maxweeks "# of Weeks Willing to Isolate"

// Dependent variable 2
alpha socdist?_1
gen socdist = 2*( (10-socdist1_1) + socdist2_1 + (10-socdist3_1) + socdist4_1 + socdist5_1 ) // multiply by 2 to create 100-point scale
la var socdist "Intended Social Distancing Index"

// Attitudes/Beliefs
gen threat = attitude1_1/10 // create 0-1 scale
gen reducedeath = attitude2_1/10 // create 0-1 scale
gen economy = attitude3_1/10 // create 0-1 scale
gen closebusiness = attitude4_1/10 // create 0-1 scale
la var threat "COVID biggest threat"
la var reducedeath "Prioritize reducing death"
la var economy "Keep economy going"
la var closebusiness "Close businesses"

// News consumption
gen follownews01 = (follownews-1)/3 // create 0-1 scale
la var follownews01 "Following COVID news"

// Gender
recode gender (1=0) (2=1) (-9 3=.), gen(female)
la var female "Female"
label define femalelab 0 "Male" 1 "Female"
la val female femalelab

// Age
rename prolific_age c_age
replace c_age = . if c_age > 120
recode c_age (18/24=1) (25/44=2) (45/120=3), gen(age3)
label define ages 1 "Age: < 24" 2 "Age: 25-44" 3 "Age: 45+"
la val age3 ages

// Race
egen racecount = rowtotal(race_?)
gen race5 = 5 if racecount > 0 // other
replace race5 = 1 if race_1==1 & racecount==1 // white
replace race5 = 2 if race_2==1 & racecount==1 // black
replace race5 = 3 if race_3==1 // hispanic
replace race5 = 4 if race_4==1 & racecount==1 // asian
replace race5 = . if race__9==1
la def racelab 1 "White" 2 "Black" 3 "Hispanic" 4 "Asian" 5 "Other"
la val race5 racelab

// Education
recode edu (1/2=1) (3/4=2) (5/8=3), gen(edu3) // hs diploma or less, some college/associates, bachelors+
la var edu3 "Education"
label define edulev 1 "High school or less" 2 "Some college/associates" 3 "Bachelor’s +", replace // uses the character ’ which is compatible with coefplot but not with esttab
la val edu3 edulev

// Essential worker
recode essentialworker (-9 = 2)
label define labels31 0 "Not essential worker" 1 "Essential worker" 2 "Unsure if essent. work.", replace
la var essentialworker "Essential worker"

// Partisanship
la var party "Party"
label define labels11 1 "Republican" 2 "Democrat" 3 "Other Party", replace

// Prosocial motivation
gen prosoc = (motiv2 + motiv3 + motiv4 + motiv5 - 4)/16 // create 0-1 scale
alpha motiv2 motiv3 motiv4 motiv5
la var prosoc "Prosocial motivation"

// Emapthy
gen empathy = (motiv1-1)/4 // create 0-1 scale
la var empathy "Empathy"

// News sources
foreach var of varlist sourcenews_? {
	replace `var' = 0 if `var' == .
}
la var sourcenews_1 "Newspapers"
la var sourcenews_2 "Magazines"
la var sourcenews_3 "TV"
la var sourcenews_4 "Radio"
la var sourcenews_5 "Websites"
la var sourcenews_6 "Social Media"

// Experimental group (for messaging survey not analyzed in this piece)
rename arm arm_str
encode arm_str, gen(arm)
label define arm 1 "Control" 2 "Treat. 1" 3 "Treat. 2" 4 "Treat. 3" 5 "Treat. 4", replace

// create macros containing variable lists to future reference
global mainvars socdist maxweeks follownews01 threat reducedeath economy closebusiness female age3 race5 edu3 essentialworker party prosoc empathy sourcenews_1 sourcenews_2 sourcenews_3 sourcenews_4 sourcenews_5 sourcenews_6
global ivs1 female b1.age3 b1.race5 b1.edu3 b0.essentialworker
global ivs2 b1.party prosoc empathy sourcenews_1 sourcenews_2 sourcenews_3 sourcenews_4 sourcenews_5 sourcenews_6
global ivs3 follownews01 threat reducedeath economy closebusiness

foreach var of varlist $mainvars {
	drop if `var' == .
}


*** Poststratification Raking ***

gen _one = 1
gen weight = 1

describe, short
local n = r(N)

matrix ACS_age3 = (0.121*`n', 0.342*`n', 0.537*`n')
matrix colnames ACS_age3 = 1 2 3
matrix coleq ACS_age3 = _one
matrix rownames ACS_age3 = age3

matrix ACS_female = ((1-0.513)*`n', 0.513*`n')
matrix colnames ACS_female = 0 1
matrix coleq ACS_female = _one
matrix rownames ACS_female = female

matrix ACS_race5 = (0.632*`n', 0.122*`n', 0.163*`n', 0.060*`n', (1-0.632-0.122-0.163-0.060)*`n') // other white black hispanic asian
matrix colnames ACS_race5 = 1 2 3 4 5
matrix coleq ACS_race5 = _one
matrix rownames ACS_race5 = race5

matrix ACS_edu3 = (0.393*`n', 0.307*`n', 0.300*`n')
matrix colnames ACS_edu3 = 1 2 3
matrix coleq ACS_edu3 = _one
matrix rownames ACS_edu3 = edu3

ipfraking [pw=weight], replace ctotal(ACS_age3 ACS_female ACS_race5 ACS_edu3) trimloabs(.5) trimhiabs(2) trimfrequenc(once)
svyset [pw=weight]

*** Descriptive stats ***

svy: mean socdist socdist?_1 maxweeks $ivs3
estat sd
sum socdist socdist?_1 maxweeks $ivs3 // not using weights (needed for min-max values)
_pctile maxweeks [pweight=weight], p(50)
return list

svy: mean i.female i.age3 i.race5 i.edu3 i.essentialworker i.party prosoc empathy sourcenews*
estat sd
sum i.female i.age3 i.race5 i.edu3 i.essentialworker i.party prosoc empathy sourcenews* // not using weights (needed for min-max values)

*** Analysis ***
svy: reg socdist $ivs1 b1.arm
	estimates store dv1_1, title("Behavior Index")
	estadd local weights "Yes"
svy: reg socdist $ivs1 $ivs2 b1.arm
	estimates store dv1_2, title("Behavior Index")
	estadd local weights "Yes"
	test 2.party = 3.party // check if Dems differ significantly from Other Party
svy: reg socdist $ivs1 $ivs2 $ivs3 b1.arm
	estimates store dv1_3, title("Behavior Index")
	estadd local weights "Yes"
reg socdist $ivs1 $ivs2 $ivs3 , robust // robustness check: no weights, no controls for experimental treatments
	estimates store dv1_4, title("Behavior Index")
	estadd local weights "No"

svy: reg maxweeks $ivs1 b1.arm
	estimates store dv2_1, title("Duration")
	estadd local weights "Yes"
svy: reg maxweeks $ivs1 $ivs2 b1.arm
	estimates store dv2_2, title("Duration")
	estadd local weights "Yes"
svy: reg maxweeks $ivs1 $ivs2 $ivs3 b1.arm
	estimates store dv2_3, title("Duration")
	estadd local weights "Yes"
reg maxweeks $ivs1 $ivs2 $ivs3 , robust // robustness check: no weights, no controls for experimental treatments
	estimates store dv2_4, title("Duration")
	estadd local weights "No"

coefplot dv1_3, bylabel(Social Dist. Behavior) ///
	|| dv2_3, bylabel(Anticipated Duration) ///
	||, drop(_cons 5.race5 2.essentialworker *.arm) ///
		headings(female = "(Male)" 2.age3 = "(Age: < 25)" 2.race5 = "(White)" 2.edu3 = "(High school or less)" 1.essentialworker = "(Not essential worker)" 2.party = "(Republican)", labsize(small)) ///
		order(female *.age3 *.race5 *.edu3 *.essentialworker *.party . prosoc empathy . sourcenews* . follownews01 . ) ///
		xline(0) levels(95 90) scheme(s1mono) ysize(5) ylabel(,labsize(small)) byopts(graphregion(margin(zero)))
graph export coefplot.png, replace width(1500)

label define edulev 1 "High school or less" 2 "Some college/associates" 3 "Bachelor's +", replace // uses the character ' which is compatible with esttab but not with coefplot

esttab dv?_?, ///
   title(Table A-2. Regression results) /// 
   mlabels(,titles) cells(b(fmt(2) star) se(par fmt(2))) ///
   legend starlevels(+ 0.10 * 0.05 ** 0.01) label ///
   stats(weights r2 N, fmt(0 3 0) label("Applies probability weights" "R-sqr" N)) ///
   order( female *.age3 *.race5 *.edu3 *.essentialworker *.party prosoc empathy sourcenews* $ivs3 ) ///
   drop(1.age3 1.race5 1.edu3 0.essentialworker 1.party 1.arm) ///
   addnotes("(two-tailed). Standard errors in parentheses (robust option used for models 4 and 8).") ///
   , using "researchnote_tables.rtf", replace

*** Robustness check: controlling for region ***

gen region = .
replace region = 1 if state==9 | state==23 | state==25 | state==33 | state==44 | state==50
replace region = 2 if state==10 | state==11 | state==24 | state==34 | state==36 | state==42
replace region = 3 if state==17 | state==18 | state==26 | state==39 | state==55
replace region = 4 if state==19 | state==20 | state==27 | state==29 | state==31 | state==38 | state==46
replace region = 5 if state==1 | state==5 | state==12 | state==13 | state==21 | state==22 | state==28 | state==37 | state==45 | state==47 | state==51 | state==54
replace region = 6 if state==4 | state==35 | state==40 | state==48
replace region = 7 if state==8 | state==16 | state==30 | state==49 | state==56
replace region = 8 if state==2 | state==6 | state==15 | state==32 | state==41 | state==53
label define regionlab 1 "New England" 2 "Mideast" 3 "Great Lakes" 4 "Plains" 5 "Southeast" 6 "Southwest" 7 "Rocky Mountain" 8 "Far West", replace
la val region regionlab

svy: reg socdist $ivs1 b8.region  b1.arm
	estimates store dv1_1, title("Behavior Index")
	estadd local weights "Yes"
svy: reg socdist $ivs1 $ivs2 b8.region  b1.arm
	estimates store dv1_2, title("Behavior Index")
	estadd local weights "Yes"
svy: reg socdist $ivs1 $ivs2 $ivs3 b8.region b1.arm
	estimates store dv1_3, title("Behavior Index")
	estadd local weights "Yes"
reg socdist $ivs1 $ivs2 $ivs3 b8.region  , robust // robustness check: no weights, no controls for experimental treatments
	estimates store dv1_4, title("Behavior Index")
	estadd local weights "No"

svy: reg maxweeks $ivs1 b8.region  b1.arm
	estimates store dv2_1, title("Duration")
	estadd local weights "Yes"
svy: reg maxweeks $ivs1 $ivs2 b8.region  b1.arm
	estimates store dv2_2, title("Duration")
	estadd local weights "Yes"
svy: reg maxweeks $ivs1 $ivs2 $ivs3 b8.region b1.arm
	estimates store dv2_3, title("Duration")
	estadd local weights "Yes"
reg maxweeks $ivs1 $ivs2 $ivs3 b8.region  , robust // robustness check: no weights, no controls for experimental treatments
	estimates store dv2_4, title("Duration")
	estadd local weights "No"

label define edulev 1 "High school or less" 2 "Some college/associates" 3 "Bachelor’s +", replace // uses the character ’ which is compatible with coefplot but not with esttab

coefplot dv1_3, bylabel(Social Dist. Behavior) ///
	|| dv2_3, bylabel(Anticipated Duration) ///
	||, drop(_cons 5.race5 2.essentialworker *.arm) ///
		headings(female = "(Male)" 2.age3 = "(Age: < 25)" 2.race5 = "(White)" 2.edu3 = "(High school or less)" 1.essentialworker = "(Not essential worker)" 2.party = "(Republican)" 2.party = "(Republican)" 1.region = "(Far West)", labsize(small)) ///
		order(female *.age3 *.race5 *.edu3 *.essentialworker *.party . prosoc empathy . sourcenews* . follownews01 . ) ///
		xline(0) levels(95 90) scheme(s1mono) ysize(5) ylabel(,labsize(small)) byopts(graphregion(margin(zero)))
graph export coefplot_with_regions.png, replace width(1500)

label define edulev 1 "High school or less" 2 "Some college/associates" 3 "Bachelor's +", replace // uses the character ' which is compatible with esttab but not with coefplot

esttab dv?_?, ///
   title(Table A-2. Regression results) /// 
   mlabels(,titles) cells(b(fmt(2) star) se(par fmt(2))) ///
   legend starlevels(+ 0.10 * 0.05 ** 0.01) label ///
   stats(weights r2 N, fmt(0 3 0) label("Applies probability weights" "R-sqr" N)) ///
   order( female *.age3 *.race5 *.edu3 *.essentialworker *.party prosoc empathy sourcenews* $ivs3 ) ///
   drop(1.age3 1.race5 1.edu3 0.essentialworker 1.party 1.arm) ///
   addnotes("(two-tailed). Standard errors in parentheses (robust option used for models 4 and 8).") ///
   , using "researchnote_tables_with_regions.rtf", replace

log close
